clear
capture log close
set more off
set mem 500m
clear matrix



**** 

use data_pt_pop_1891, clear

*** Fix manchester and salford
replace loc="manchester" if loc=="salford"
count
collapse (sum) pop, by(loc year)
count

*** Merge to regions
sort loc
merge loc using data_regions
tab _merge
keep if _merge==3
drop _merge

collapse (sum) pop, by(region2 year)
rename region2 region

** Save
sort region year
save temp_prepared_region_pop_data, replace
clear



********* Step 2: prepare industry data I**********
use data_city_industry_data_1851_1891, clear

*** Merge to regions
sort loc
merge loc using data_regions
tab _merge
keep if _merge==3
drop _merge

*** Combine into more aggregated categories
replace group_2_name="textiles" if group_2==1614
replace group_2_name="textiles" if group_2==1607
replace group_2_name="textiles" if group_2==1605
replace group_2_name="textiles" if group_2==1310
replace group_2_name="textiles" if group_2==1302

replace group_2_name="transport" if group_2==801
replace group_2_name="transport" if group_2==802
replace group_2_name="transport" if group_2==809

replace group_2_name="trade_service" if group_2==702
replace group_2_name="trade_service" if group_2==811

replace group_2_name="drinks_food" if group_2==1201
replace group_2_name="drinks_food" if group_2==1404

replace group_2_name="misc_manufactures" if group_2==1410
replace group_2_name="misc_manufactures" if group_2==1509
replace group_2_name="misc_manufactures" if group_2==1907

replace group_2_name="chemicals_oils" if group_2==310
replace group_2_name="chemicals_oils" if group_2==1206

replace group_2_name="shopkeepers" if group_2_name=="shopkeepers_salesmen_etc"


** Collapse employment by region year and industry category
collapse (sum) pop, by(region2 year group_2_name)
rename region2 region

*** Generate industry employment shares by region
bysort region year: egen totpop=sum(pop)
gen emp_shr=pop/totpop

** Prepare to merge into population data
keep region year group_2_name emp_shr
reshape wide emp_shr, i(region year) j(group_2_name) string




****************** Step 3: Combine ****************
sort region year
merge region year using temp_prepared_region_pop_data
tab _merge




************ Step 4: prepare for regressions ****************
encode region, gen(region_code)
tsset region_code year
order region region_code year

* Growth rates
sort region year
gen growth=(pop-pop[_n-1])/pop[_n-1] if region==region[_n-1]
order region region_code year pop growth

gen log_pop=ln(pop)

matrix define Z=J(6,1,.)
matrix define RMSPE=(0)
matrix define X=J(20,1,.)
matrix define W=J(8,1,.)

****
preserve
keep region region_code
duplicates drop
sort region_code
gen code=_n
keep region code
outsheet using results_figure_4_synthetic_control_region_codes.csv, comma names replace
restore



************ Step 5: Synthetic control regressions -- Log population level ***************

foreach i in 1 2 3 4 5 6 7 8 9 {

synth log_pop emp_shrbuilder(1851) emp_shrchemicals_oils(1851) emp_shrdress(1851) emp_shrdrinks_food(1851) emp_shrearthenware_bricks(1851) emp_shrleather_hair_etc(1851) emp_shrmetal_manuf(1851) emp_shrmining_related(1851) emp_shrmisc_manufactures(1851) emp_shrprofessionals(1851) emp_shrpaper_etc(1851) emp_shrservice(1851) emp_shrships(1851) emp_shrshopkeepers(1851) emp_shrtextiles(1851) emp_shrtrade_service(1851) emp_shrtransport(1851) emp_shrvehicles(1851) emp_shrwater_gas_service(1851) pop, trunit(`i') trperiod(1871) xperiod(1851(10)1861) nested fig
matrix Y=e(Y_treated)
matrix S=e(Y_synthetic)
matrix Z=Z,Y,S
matrix x=e(X_balance)
matrix X=X,x
matrix w=e(W_weights)
matrix W=W,w
matrix RM=e(RMSPE)
matrix RMSPE=RMSPE,RM
matrix drop Y
matrix drop S
matrix drop RM
matrix drop x
matrix drop w

}


preserve
clear
svmat Z
outsheet using results_figure_4_region_pop_results.csv, comma names replace
clear

svmat X
outsheet using results_figure_4_region_balance.csv, comma names replace
clear

svmat W
outsheet using results_figure_4_region_weights.csv, comma names replace
clear

svmat RMSPE
outsheet using results_figure_4_region_RMSPE.csv, comma names replace
clear

restore


************ Step 6: Synthetic controls regressions -- growth **********

drop if year==1841
clear matrix
matrix define Z=J(5,1,.)
matrix define RMSPE=(0)
matrix define X=J(21,1,.)
matrix define W=J(8,1,.)

foreach i in 1 2 3 4 5 6 7 8 9 {

synth growth emp_shrbuilder(1851) emp_shrchemicals_oils(1851) emp_shrdress(1851) emp_shrdrinks_food(1851) emp_shrearthenware_bricks(1851) emp_shrleather_hair_etc(1851) emp_shrmetal_manuf(1851) emp_shrmining_related(1851) emp_shrmisc_manufactures(1851) emp_shrprofessionals(1851) emp_shrpaper_etc(1851) emp_shrservice(1851) emp_shrships(1851) emp_shrshopkeepers(1851) emp_shrtextiles(1851) emp_shrtrade_service(1851) emp_shrtransport(1851) emp_shrvehicles(1851) emp_shrwater_gas_service(1851) growth(1851) growth(1861), trunit(`i') trperiod(1871) xperiod(1851(10)1861) nested fig
matrix Y=e(Y_treated)
matrix S=e(Y_synthetic)
matrix Z=Z,Y,S
matrix x=e(X_balance)
matrix X=X,x
matrix w=e(W_weights)
matrix W=W,w
matrix RM=e(RMSPE)
matrix RMSPE=RMSPE,RM
matrix drop Y
matrix drop S
matrix drop RM
matrix drop x
matrix drop w

}



preserve
clear
svmat Z
outsheet using results_figure_4_region_growth_results.csv, comma names replace
clear

svmat X
outsheet using results_figure_4_region_growth_balance.csv, comma names replace
clear

svmat W
outsheet using results_figure_4_region_growth_weights.csv, comma names replace
clear

svmat RMSPE
outsheet using results_figure_4_region_growth_RMSPE.csv, comma names replace
clear





